suppressMessages(library(Matrix))
suppressMessages(library(SingleCellExperiment))
suppressMessages(library(biomaRt))
suppressMessages(library(scDblFinder))
suppressMessages(library(Seurat))
suppressMessages(library(ggplot2))
suppressMessages(library(scran))
suppressMessages(library(scater))
suppressMessages(library(ggpubr))
suppressMessages(library(sctransform))
srt <- readRDS("/mnt/nmorais-nfs/marta/pA_karine/r_session/e12h-preliminary-analysis/integration/data/srt-total-dataset.rds")
sce <- readRDS("/mnt/nmorais-nfs/marta/pA_karine/r_session/e12h-preliminary-analysis/integration/data/sce-total.rds")
assays(sce)$logcounts <- NULL
assays(sce)$limma <- NULL
assays(sce)$limma_dataset <- NULL
df <- readRDS("/mnt/nmorais-nfs/marta/pA_karine/r_session/e12h-preliminary-analysis/integration/data/qc-df.rds")
keep_macrophages <- srt$RNA_snn_res.0.1
keep_macrophages <- keep_macrophages %in% c(0,3)
srt <- srt[,keep_macrophages]
sce <- sce[, keep_macrophages]
df <- df[keep_macrophages,]
df
logmat <- log2(counts(sce)+1)
df1 <- data.frame(total_counts = df$library_size,
                 total_features = df$total_features,
                 sample = sce$sample,
                 dataset = sce$dataset,
                 sorting = sce$sorting,
                 condition = sce$condition)

df2 <- data.frame(log10_total_counts = log10(df$library_size),
                  log10_total_features = log10(df$total_features),
                  dataset = sce$dataset,
                  sorting = sce$sorting,
                  sample = sce$sample)
varMatrix <- getVarianceExplained(logmat, variables = df1)
varMatrix[1:5,]
##               total_counts total_features     sample    dataset    sorting
## a               7.13906359     7.12053327 9.49776131 9.36763252 9.36786206
## A030001D20Rik   0.25132938     0.18967759 0.04015338 0.01254543 0.01661120
## A030005L19Rik   0.01396117     0.02128989 0.15640516 0.08617376 0.09013159
## A030014E15Rik   0.01116884     0.01044194 0.04226329 0.01235296 0.01819649
## A130010J15Rik   2.87287706     2.76662351 0.46264263 0.41429479 0.42324135
##                condition
## a             7.20714815
## A030001D20Rik 0.02399383
## A030005L19Rik 0.08014567
## A030014E15Rik 0.01063593
## A130010J15Rik 0.27353676
          plotExplanatoryVariables(
            varMatrix,
            variables = variables)  + 
  ggtitle("Macrophages after QC") +
  theme(text = element_text(size=20)) +
 scale_color_manual(values=c( "dodgerblue" ,"purple" , "green3","orange", "blue", "red"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Warning: Removed 1182 rows containing non-finite values (`stat_density()`).

make_barplot <- function(df, x, y, title, ylab){
  ggplot(df, aes(x=x, y=y, fill=x)) +
  
  geom_violin(show.legend = FALSE) + 
  theme_bw() +
  theme(text = element_text(size=20), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  ylab(ylab) + xlab("Sample") +
  scale_fill_manual(values=sample_colors) +
    ggtitle(title)
}
sample_colors <- c("blue", "green", "orange", "yellow3", "pink", "red", "dodgerblue", "coral1",
                   "purple", "magenta", "cyan", "grey")
make_barplot(df1, df1$sample, df1$total_counts, "All samples after QC", "Library size")

make_barplot(df2, df2$sample, df2$log10_total_counts, "All samples after QC", "Library size (log10)")

make_barplot(df1, df1$sample, df1$total_features, "All samples after QC", "Total features")

make_barplot(df2, df2$sample, df2$log10_total_features, "All samples after QC", "Total features (log10)")

make_barplot(df2, df2$dataset, df2$log10_total_counts, "All samples after QC", "Library size (log10)")

make_barplot(df2, df2$dataset, df2$log10_total_features, "All samples after QC", "Total features (log10)")

make_barplot(df2, df2$sorting, df2$log10_total_counts, "All samples after QC", "Library size (log10)")

make_barplot(df2, df2$sorting, df2$log10_total_features, "All samples after QC", "Total features (log10)")

Remove noise:

keep_genes <- rowSums(counts(sce)) > 10
sce <- sce[keep_genes,]
qclust <- quickCluster(sce)
sce <- computeSumFactors(sce, clusters=qclust)
summary(sizeFactors(sce))   
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1216  0.4972  0.6738  1.0000  1.1161 15.6796
sce <- logNormCounts(sce, 
                     size_factors = sizeFactors(sce),
                     log = TRUE,
                     pseudo_count = 1, 
                     exprs_values = "counts"
                     )
mat <- assays(sce)$logcounts
total_counts <- colSums((2**mat)-1)
total_counts[1:5]
## AAACCCAAGTGGAAGA-1_mct1 AAACCCACAGTGTGGA-1_mct1 AAACCCAGTTGGTACT-1_mct1 
##                14192.04                14148.52                10632.85 
## AAACGAAAGATCACTC-1_mct1 AAACGAACACGGATCC-1_mct1 
##                15242.36                14627.17
df3 <- data.frame(total_counts = total_counts,
                 total_features = df$total_features,
                 sample = sce$sample,
                 sorting = sce$sorting,
                 dataset = sce$dataset,
                 condition = sce$condition)
df4 <- data.frame(log10_total_counts = log10(total_counts),
                    log10_total_features = log10(df$total_features),
                  sample = sce$sample,
                  dataset = sce$dataset,
                  sorting = sce$sorting)
make_barplot(df3, df3$sample, df3$total_counts, "All samples", "Library size") 

make_barplot(df4, df4$sample, df4$log10_total_counts, "All samples", "Library size (log10)") +ylim(3.5,5)

make_barplot(df3, df3$sorting, df3$total_counts, "All samples", "Library size") 

make_barplot(df4, df4$sorting, df4$log10_total_counts, "All samples", "Library size (log10)") +ylim(3.5,5)

make_barplot(df3, df3$dataset, df3$total_counts, "All samples", "Library size") 

make_barplot(df4, df4$dataset, df4$log10_total_counts, "All samples", "Library size (log10)") +ylim(3.5,5)

normVarMatrix <- getVarianceExplained(assays(sce)$logcounts, variables = df3[,c("total_counts", "total_features", "sample", "condition", "sorting", "dataset")])
          plotExplanatoryVariables(
            normVarMatrix,
            variables = variables)  + 
  ggtitle("All samples") +
  theme(text = element_text(size=20)) +
 scale_color_manual(values=c( "green3","purple" ,"orange", "dodgerblue" , "blue", "red"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

#saveRDS(sce, "/mnt/nmorais-nfs/marta/pA_karine/r_session/e12h-preliminary-analysis/macrophage_subset/data/sce-macrophages.rds")
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.6 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] sctransform_0.3.5           ggpubr_0.5.0               
##  [3] scater_1.22.0               scran_1.22.1               
##  [5] scuttle_1.4.0               ggplot2_3.4.2              
##  [7] SeuratObject_4.1.3          Seurat_4.1.1               
##  [9] scDblFinder_1.8.0           biomaRt_2.50.3             
## [11] SingleCellExperiment_1.16.0 SummarizedExperiment_1.24.0
## [13] Biobase_2.54.0              GenomicRanges_1.46.1       
## [15] GenomeInfoDb_1.30.1         IRanges_2.28.0             
## [17] S4Vectors_0.32.4            BiocGenerics_0.40.0        
## [19] MatrixGenerics_1.6.0        matrixStats_0.63.0         
## [21] Matrix_1.5-3               
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.4                reticulate_1.26          
##   [3] tidyselect_1.2.0          RSQLite_2.2.20           
##   [5] AnnotationDbi_1.56.2      htmlwidgets_1.6.4        
##   [7] grid_4.1.2                BiocParallel_1.28.3      
##   [9] Rtsne_0.16                munsell_0.5.0            
##  [11] ScaledMatrix_1.2.0        codetools_0.2-18         
##  [13] ica_1.0-3                 statmod_1.5.0            
##  [15] xgboost_1.7.3.1           future_1.31.0            
##  [17] miniUI_0.1.1.1            withr_3.0.0              
##  [19] spatstat.random_3.1-3     colorspace_2.1-0         
##  [21] progressr_0.13.0          filelock_1.0.2           
##  [23] highr_0.10                knitr_1.45               
##  [25] rstudioapi_0.15.0         ROCR_1.0-11              
##  [27] ggsignif_0.6.4            tensor_1.5               
##  [29] listenv_0.9.0             labeling_0.4.3           
##  [31] GenomeInfoDbData_1.2.7    polyclip_1.10-4          
##  [33] farver_2.1.1              bit64_4.0.5              
##  [35] parallelly_1.34.0         vctrs_0.6.5              
##  [37] generics_0.1.3            xfun_0.42                
##  [39] BiocFileCache_2.2.1       R6_2.5.1                 
##  [41] ggbeeswarm_0.7.1          rsvd_1.0.5               
##  [43] locfit_1.5-9.7            bitops_1.0-7             
##  [45] spatstat.utils_3.0-1      cachem_1.0.8             
##  [47] DelayedArray_0.20.0       assertthat_0.2.1         
##  [49] promises_1.2.1            scales_1.3.0             
##  [51] beeswarm_0.4.0            gtable_0.3.4             
##  [53] beachmat_2.10.0           globals_0.16.2           
##  [55] goftest_1.2-3             rlang_1.1.3              
##  [57] splines_4.1.2             rstatix_0.7.2            
##  [59] lazyeval_0.2.2            broom_1.0.5              
##  [61] spatstat.geom_3.0-6       yaml_2.3.8               
##  [63] reshape2_1.4.4            abind_1.4-5              
##  [65] backports_1.4.1           httpuv_1.6.14            
##  [67] tools_4.1.2               ellipsis_0.3.2           
##  [69] spatstat.core_2.4-4       jquerylib_0.1.4          
##  [71] RColorBrewer_1.1-3        ggridges_0.5.6           
##  [73] Rcpp_1.0.12               plyr_1.8.9               
##  [75] sparseMatrixStats_1.6.0   progress_1.2.2           
##  [77] zlibbioc_1.40.0           purrr_1.0.2              
##  [79] RCurl_1.98-1.10           prettyunits_1.1.1        
##  [81] rpart_4.1.19              deldir_1.0-6             
##  [83] pbapply_1.7-0             viridis_0.6.2            
##  [85] cowplot_1.1.1             zoo_1.8-12               
##  [87] ggrepel_0.9.5             cluster_2.1.4            
##  [89] magrittr_2.0.3            data.table_1.15.2        
##  [91] scattermore_0.8           lmtest_0.9-40            
##  [93] RANN_2.6.1                fitdistrplus_1.1-8       
##  [95] hms_1.1.2                 patchwork_1.2.0          
##  [97] mime_0.12                 evaluate_0.23            
##  [99] xtable_1.8-4              XML_3.99-0.16.1          
## [101] gridExtra_2.3             compiler_4.1.2           
## [103] tibble_3.2.1              KernSmooth_2.23-20       
## [105] crayon_1.5.2              htmltools_0.5.7          
## [107] mgcv_1.8-41               later_1.3.2              
## [109] tidyr_1.3.1               DBI_1.1.3                
## [111] dbplyr_2.3.0              MASS_7.3-58.2            
## [113] rappdirs_0.3.3            car_3.1-1                
## [115] cli_3.6.2                 parallel_4.1.2           
## [117] metapod_1.2.0             igraph_1.3.5             
## [119] pkgconfig_2.0.3           sp_1.6-0                 
## [121] plotly_4.10.1             spatstat.sparse_3.0-0    
## [123] xml2_1.3.3                vipor_0.4.5              
## [125] bslib_0.6.1               dqrng_0.3.0              
## [127] XVector_0.34.0            stringr_1.5.1            
## [129] digest_0.6.34             RcppAnnoy_0.0.20         
## [131] spatstat.data_3.0-0       Biostrings_2.62.0        
## [133] rmarkdown_2.26            leiden_0.4.3             
## [135] uwot_0.1.14               edgeR_3.36.0             
## [137] DelayedMatrixStats_1.16.0 curl_5.2.1               
## [139] shiny_1.8.0               nlme_3.1-162             
## [141] lifecycle_1.0.4           jsonlite_1.8.8           
## [143] carData_3.0-5             BiocNeighbors_1.12.0     
## [145] viridisLite_0.4.2         limma_3.50.3             
## [147] fansi_1.0.6               pillar_1.9.0             
## [149] lattice_0.20-45           KEGGREST_1.34.0          
## [151] fastmap_1.1.1             httr_1.4.4               
## [153] survival_3.5-0            glue_1.7.0               
## [155] png_0.1-8                 bluster_1.4.0            
## [157] bit_4.0.5                 stringi_1.8.3            
## [159] sass_0.4.8                blob_1.2.3               
## [161] BiocSingular_1.10.0       memoise_2.0.1            
## [163] dplyr_1.1.4               irlba_2.3.5.1            
## [165] future.apply_1.10.0